********************************************************************************
*** Data Paper (MIPD Release 2.0): generate the data for the state problem disproportionality
***
*** Created: 10/23/23
***
********************************************************************************

use "MIPD -- Release 2.0.dta", clear

********************************************************************************
*** State-level variation
********************************************************************************
bys studyid: gen obsid = _n

keep state studyid obsid mipcode*_r1

* Reshape the data
qui foreach v of numlist 1(1)22 {
	rename mipcode`v'_r1 mipcode`v'
}

reshape long mipcode, i(studyid obsid) j(quasi)

* Drop if it is missing that quasi-response
drop if mipcode == .

* Keep if it has state values
drop if inlist(state, ., 999)

* Create general categories
gen cat = round(mipcode, 100)
replace cat = (cat / 100)

cap lab drop cat
lab def cat 1 "Economy" 2 "Social Policy" 3 "Rights" 4 "Public Safety" 5 "Fiscal Policy" 6 "Foreign Policy" 7 "International Economics" 8 "Environment" 9 "Morality" 10 "Politics" 11 "Youth" 12 "Groups" 13 "Other" 14 "None" 15 "Don't Know"
lab val cat cat

* Create a simple weight variable so each respondent is equal (no matter how many quasi-responses)
tempvar num_quasi
bys studyid obsid: egen `num_quasi' = max(quasi)
gen weight = 1/`num_quasi'


********************************************************************************
*** Calculate the biggest categories across states
********************************************************************************

* Generate the national average (specific categories)
gen p = .

tempvar denom
egen `denom' = total(weight)

levelsof mipcode, local(MIP)
qui foreach mip of local MIP  {
	tempvar mip_`mip' num_`mip' perc_`mip'
	gen `mip_`mip'' = cond(mipcode == `mip', 1, 0)
	egen `num_`mip'' = total(`mip_`mip'' * weight)
	gen `perc_`mip'' = 100 * (`num_`mip'' / `denom')
	
	replace p = `perc_`mip'' if mipcode == `mip'
}

* Generate the national average (general categories)
gen p_c = .

tempvar denom
egen `denom' = total(weight)

levelsof cat, local(MIP)
qui foreach mip of local MIP  {
	tempvar mip_`mip' num_`mip' perc_`mip'
	gen `mip_`mip'' = cond(cat == `mip', 1, 0)
	egen `num_`mip'' = total(`mip_`mip'' * weight)
	gen `perc_`mip'' = 100 * (`num_`mip'' / `denom')
	
	replace p_c = `perc_`mip'' if cat == `mip'
}

* Most common quasi-response in each state?
gen perc = .

tempvar denom
bys state: egen `denom' = total(weight)

levelsof mipcode, local(MIP)
qui foreach mip of local MIP  {
	tempvar mip_`mip' num_`mip' perc_`mip'
	gen `mip_`mip'' = cond(mipcode == `mip', 1, 0)
	bys state: egen `num_`mip'' = total(`mip_`mip'' * weight)
	gen `perc_`mip'' = 100 * (`num_`mip'' / `denom')
	
	replace perc = `perc_`mip'' if mipcode == `mip'
}

* Generate the national standard deviation (specific categories)
preserve
	duplicates drop state mipcode, force

	bys mipcode: egen sd = sd(perc)
	sort mipcode
	
	tempfile sd
	save `sd', replace 
restore

sort mipcode
merge mipcode using `sd', keep(sd)
drop _merge

*gen disp = (perc - p)
gen disp = (perc - p) / sd

* Most common general category in each state?
gen perc_c = .

tempvar denom
bys state: egen `denom' = total(weight)

levelsof cat, local(CAT)
qui foreach mip of local CAT  {
	tempvar mip_`mip' num_`mip' perc_`mip'
	gen `mip_`mip'' = cond(cat == `mip', 1, 0)
	bys state: egen `num_`mip'' = total(`mip_`mip'' * weight)
	gen `perc_`mip'' = 100 * (`num_`mip'' / `denom')
	
	replace perc_c = `perc_`mip'' if cat == `mip'
}

* Generate the national standard deviation (general categories)
preserve
	duplicates drop state cat, force

	bys cat: egen sd_c = sd(perc_c)
	sort cat
	
	tempfile sd_c
	save `sd_c', replace 
restore

sort cat
merge cat using `sd_c', keep(sd_c)
drop _merge

gen disp_c = (perc_c - p_c) / sd_c

save "State Disproportionality.dta", replace
